I felt as though a dynamic plot would be a good vehicle for displaying Colorado’s covid case data - the ability to see both the general trends across counties as well as selecting a specific county for greater detail helps to draw comparisons between regions. This data was sourced from the New York Times’ github repository. I restriced this investigation to the 15 largest (most populated) counties.
# Load libraries
library(ggplot2)
library(htmlwidgets)
library(plotly)
library(tidyr)
library(dplyr)
library(readr)
# Download Covid Data ----
cases_data <- read.csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv",
stringsAsFactors = F)
# Subset Colorado data
co.data <- cases_data[cases_data$state == 'Colorado', ]
# Generate weekly averages
co.rolling.cases <- co.data %>%
arrange(date) %>%
mutate(cases_1day = cases - dplyr::lag(cases,1),
deaths_1day = deaths - dplyr::lag(deaths,1),
cases_7day = zoo::rollmean(cases_1day, 7, fill=NA, align="right"),
deaths_7day = zoo::rollmean(deaths_1day, 7, fill=NA, align="right"))
# 15 largest CO counties (by population)
largest.counties <- c('El Paso',
'Denver',
'Arapahoe',
'Jefferson',
'Adams',
'Larimer',
'Douglas',
'Boulder',
'Weld',
'Pueblo',
'Mesa',
'Broomfield',
'Garfield',
'Eagle',
'La Plata')
co.rolling.cases <- co.rolling.cases[co.rolling.cases$county %in% largest.counties, ]
co.rolling.cases$county <- factor(co.rolling.cases$county, levels = largest.counties)
# Pivot to long-format data and separate cumulative from weekly average data
all.co.long <- co.rolling.cases %>%
select(date, county, cases, deaths) %>%
pivot_longer(c('cases', 'deaths'), names_to = "type", values_to = 'Count') %>%
drop_na()
all.co.long$time <- rep('Cumulative', times = nrow(all.co.long))
week.co.long <- co.rolling.cases %>%
select(date, county, cases_7day, deaths_7day) %>%
pivot_longer(c('cases_7day', 'deaths_7day'), names_to = "type", values_to = 'Count') %>%
drop_na()
week.co.long$type <- gsub("_7day", '', week.co.long$type)
week.co.long$time <- rep('Weekly Average', times = nrow(week.co.long))
day.co.long <- co.rolling.cases %>%
select(date, county, cases_1day, deaths_1day) %>%
pivot_longer(c('cases_1day', 'deaths_1day'), names_to = "type", values_to = 'Count') %>%
drop_na()
day.co.long$type <- gsub("_1day", '', day.co.long$type)
day.co.long$time <- rep('Daily Change', times = nrow(day.co.long))
# Combine data
co.long <- rbind(all.co.long, week.co.long, day.co.long)
# Ensure date is in correct format and specify time factor order
co.long$date <- as.Date(co.long$date)
co.long$time <- factor(co.long$time, levels = c('Cumulative', 'Weekly Average', 'Daily Change'))
co.long$type <- ifelse(co.long$type == 'cases', 'Cases', 'Deaths')
# Generate figure with ggplot
co.ggp <- ggplot(co.long, aes(x = date, y = Count)) +
geom_line(aes(color = county)) +
theme_minimal(base_size = 12) +
facet_grid(type ~ time, scales = 'free_y') +
scale_x_date(breaks = "1 year",
minor_breaks = "6 months",
date_labels = "%Y",
limits=c(as.Date("2020-01-01"), NA)) +
scale_color_viridis_d() +
labs(color = 'County')
# Convert to Plotly figures
co.ptly <- ggplotly(co.ggp) %>%
layout(autosize = T)